merging background and presence data into one giant dataset, and timesliced subsets
A note to anyone who might happen to stumble across this… I am a beginner in R and have had no exposure to similar languages. I don’t know what I’m doing. The code herein is unlikely to be elegant and there are probably more efficient ways of running the code.
Built with ‘r getRversion()’.
You can load them using the following code which uses a function called ipak. Note this function checks to see if the packages are installed first.
packages <- c("plyr")
source("./src/ipak.R")
ipak(packages)
plyr
TRUE
read in the presence and background
presencemerging <- read.csv("../output/bio/presence_points_without_envdata_relooped_glbathy_nodup.csv", header = TRUE)
backgroundmerging <- read.csv("../output/bio/background_complete_obs_cels_globot_cellid_xyzt.csv", header = TRUE)
now check
colnames(presencemerging)
[1] "cell_id" "year" "month" "depthlayerno" "id" "decimalLatitude" "decimalLongitude"
[8] "datecollected" "institutioncode" "individualcount" "depth" "resname" "originalscientificname" "collectioncode"
[15] "day" "occurrence" "nafo_zone" "gear" "longitude_meters" "latitude_meters" "amo_sample"
[22] "amo_prev" "amo_winter" "depth_layer" "bottom_depth" "total_cell_obs" "yymm_cell_obs" "chl_surface"
[29] "chl_depth" "mlp_surface" "o2_surface" "o2_depth" "salinity_surface" "salinity_depth" "ssh_surface"
[36] "temp_surface" "temp_depth" "nao_sample" "nao_prev" "nao_winter" "XXtotal_cell_obs_xyzt" "temp_celsius_depth"
[43] "temp_celsius_surface" "longitude_meters.1" "latitude_meters.1" "bottom_depth_glorys" "longitude_meters.2" "latitude_meters.2" "cell_id_3d"
[50] "cell_id_xyzt" "total_cell_obs_xyzt"
colnames(backgroundmerging)
[1] "cell_id" "year" "month" "depthlayerno" "X" "longitude_meters" "latitude_meters"
[8] "decimalLongitude" "decimalLatitude" "nafo_zone" "id" "originalscientificname" "amo_sample" "amo_prev"
[15] "amo_winter" "nao_sample" "nao_prev" "nao_winter" "depth_layer" "bottom_depth" "total_cell_obs_xy"
[22] "total_cell_obs_xyt" "total_cell_obs_xyzt" "temp_depth" "temp_surface" "salinity_depth" "salinity_surface" "chl_depth"
[29] "chl_surface" "o2_depth" "o2_surface" "mlp_surface" "ssh_surface" "longitude_meters.1" "latitude_meters.1"
[36] "optional" "occurrence" "temp_celsius_depth" "temp_celsius_surface" "bottom_depth_glorys" "longitude_meters.2" "latitude_meters.2"
[43] "optional.1" "cell_id_3d" "cell_id_xyzt"
ok so the two dataframed need a spot of cleaning
presencemerging <- subset(presencemerging, select = -c(longitude_meters.1, latitude_meters.1, longitude_meters.2, latitude_meters.2, XXtotal_cell_obs_xyzt))
Error in eval(substitute(select), nl, parent.frame()) :
object 'longitude_meters.1' not found
for consistency, rename a couple of presencemerging columns
names(presencemerging)[names(presencemerging)=="total_cell_obs"] <- "total_cell_obs_xy"
names(presencemerging)[names(presencemerging)=="yymm_cell_obs"] <- "total_cell_obs_xyt"
now merge the two datasets
presab <- rbind.fill(presencemerging, backgroundmerging)
write.csv(presab, "../output/bio/presab.csv", row.names = FALSE)
head(presab)
lovely jubley